library(Seurat)
## Loading required package: ggplot2
## Loading required package: cowplot
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
## Loading required package: Matrix
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
seurobj <- readRDS('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/10x-180831')
aligned <- readRDS('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/10x-180831-aligned')
aligned_T1_old <- readRDS('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/10x-180504-T1-aligned')
For now the data was only filtered on 0.1 percent.mito. The subcluster from T1 with low UMI and gene counts was removed.
VlnPlot(seurobj, c("nGene", "percent.mito", "nUMI"), group.by='timepoint', nCol = 1, point.size.use=-1, size.x.use = 10)
GenePlot(seurobj, 'nUMI', 'nGene', cex.use = 0.5)
PCElbowPlot(seurobj, num.pc=50) #The elbow is at 6 pcs. Run tSNE on both 6, 18 and 30 PC's.
Interesting to see: T4 and T5 contain a lot more variation than T1, T2 and T3, and PC2 seems to split T4 and T5. Could the split in PC2 describe the cells developing into white or brown?
PCAPlot(seurobj, group.by='timepoint', pt.size=0.1)
If EBF2 marks brown then not.
FeaturePlot(seurobj, reduction.use='pca', features.plot='EBF2', pt.size=1, cols.use=c('gray', 'blue'), no.legend=F)
TSNE on 6 PC’s.
DimPlot(seurobj, reduction.use='tsne6', group.by='timepoint', pt.size=0.1)
The first 6 PCs explain most of the variation (as we could see in the elbow plot above), but they don’t capture the more subtle differences. For example, when only using 6 PC’s the mixture cluster cannot be found:
DimPlot(seurobj, reduction.use='tsne6', pt.size=0.1, cells.highlight=seurobj@cell.names[seurobj@meta.data$res.0.5 == 11], cols.use='grey', cols.highlight='blue')
TSNE on 18 PC’s.
DimPlot(seurobj, reduction.use='tsne18', group.by='timepoint', pt.size=0.1)
DimPlot(seurobj, reduction.use='tsne18', group.by='Phase', pt.size=0.1)
Cluster 11 = mixture cluster.
DimPlot(seurobj, reduction.use='tsne18', group.by='res.0.5', pt.size=0.1)
FeaturePlot(seurobj, reduction.use='tsne18', features.plot = 'nUMI', cols.use=c('grey', 'blue'), no.legend=F)
FeaturePlot(seurobj, reduction.use='tsne18', features.plot = 'percent.mito', cols.use=c('grey', 'blue'), no.legend = F)
FeaturePlot(seurobj, reduction.use='tsne18', features.plot = 'nGene', cols.use=c('grey', 'blue'), no.legend = F)
Shared correlation per CC.
#Make again
#load('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/MetageneBicorPlots')
#ccplot_aligned
12 CC’s were used for tSNE.
TSNEPlot(aligned, group.by='timepoint', pt.size=0.1)
Shared correlation per CC.
#ccplot_aligned_T1_old make new plot
15 CC’s were used for tSNE.
#replacing res.0.5 column of T1 with 'T1'
aligned_T1_old@meta.data[aligned_T1_old@meta.data$sample_name == 'T1', 'res.0.5'] = 'T1'
TSNEPlot(aligned_T1_old, group.by='sample_name', pt.size=0.1)
TSNEPlot(aligned_T1_old, group.by='res.0.5', pt.size=0.1)
Highligted mixture cluster
DimPlot(aligned_T1_old, reduction.use='tsne', cells.highlight=aligned_T1_old@cell.names[which(aligned_T1_old@meta.data$res.0.5 %in% 12)], pt.size.use=0.1, cols.highlight = 'blue', cols.use='grey')
Highlighted T1 cells. It does not look like T1 cells are in the mixture cluster on the right. Could it be the big cluster in T1 with low UMI and gene counts affects the sample alignment and that’s the reason we are not seeing cells of T1 in the mixture cluster? Or because T1 consists of four samples? There are also some cells from the mixture cluster scattered accross the whole dataset, so maybe the mixture cells from T1 are there but also scattered accross the data.
DimPlot(aligned_T1_old, reduction.use='tsne', cells.highlight=aligned_T1_old@cell.names[which(aligned_T1_old@meta.data$res.0.5 %in% 'T1')], pt.size.use=0.1, cols.use = 'grey', cols.highlight = 'blue')
We can also look at some of the marker genes for the mixture cluster.
T1 <- readRDS('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/10x-180831-T1')
Positive markers. (the clustering was performed on the whole dataset).
Clusters 1, 2, 3, 4, 7, 11 belong to T1-T3. Clusters 5, 6, 8, 9, 10 belong to T4-T5.
VlnPlot(seurobj, group.by='res.0.5', nCol=2, features.plot=c('ACTB', 'TPM2', 'ANXA2', 'LDHA', 'PRDX1', 'MYL12B', 'MYL12A', 'CCND1', 'RAB13', 'MAP1B'), point.size.use=-1)
Negative markers. MALAT1 and NEAT1 seem to be the best markers.
VlnPlot(seurobj, group.by='res.0.5', nCol=2, features.plot=c('MALAT1', 'COL1A1', 'NEAT1', 'FN1', 'COL1A2', 'HIST1H4C', 'CYR61', 'COL3A1'), point.size.use=-1)
It looks like cluster 11 marks the mixture cluster. The clustering here was performed on the whole dataset. Let’s see where cluster 11 is:
TSNEPlot(seurobj, group.by='res.0.5', pt.size=0.1)
Does cluster 11 also contain cells from other timepoints?
table(seurobj@meta.data$timepoint[seurobj@meta.data$res.0.5 == 11])
##
## T1 T2 T3 T4 T5 T6
## 13 29 32 3 3 0
FeaturePlot(seurobj, reduction.use='tsne18', features.plot = 'EBF2', cols.use=c('grey', 'blue'), no.legend = F)
FeaturePlot(seurobj, reduction.use='tsne18', features.plot = 'TM4SF1', cols.use=c('grey', 'blue'), no.legend = F)
FeaturePlot(seurobj, reduction.use='tsne18', features.plot = 'LY6K', cols.use=c('grey', 'blue'), no.legend = F)
FeaturePlot(seurobj, reduction.use='tsne18', features.plot = 'PDGFRA', cols.use=c('grey', 'blue'), no.legend = F)
markers.time <- read.table('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/markergenes/180831/markers_time-combined_negbinom', sep='\t', header=T)
markers.time.1 <- markers.time[markers.time$cluster == 'T1-T2-T3', ]
Positive marker genes T1-T2-T3
as.data.frame(markers.time.1[order(-markers.time.1$avg_logFC),])
Negative markers T1-T2-T3 or positive markers T4-T5
as.data.frame(markers.time.1[order(markers.time.1$avg_logFC),])
Interesting: FN1 was a negative marker gene for the mixture cluster.
Positive marker genes T1-T2-T3:
FeaturePlot(seurobj, features.plot=as.vector(markers.time.1[order(-markers.time.1$avg_logFC),'gene'][1:10]), cols.use=c('gray', 'blue'), no.legend=F, nCol=2)
VlnPlot(seurobj, features.plot=as.vector(markers.time.1[order(-markers.time.1$avg_logFC),'gene'][1:10]), group.by='timepoint', nCol=2, point.size.use = -1)
Positive marker genes T4-T5
FeaturePlot(seurobj, features.plot=as.vector(markers.time.1[order(markers.time.1$avg_logFC),'gene'][1:10]), cols.use=c('gray', 'blue'), no.legend=F, nCol=2)
VlnPlot(seurobj, features.plot=as.vector(markers.time.1[order(markers.time.1$avg_logFC),'gene'][1:10]), group.by='timepoint', nCol=2, point.size.use = -1)
FAB4: FABP4 is a known mature adipocyte marker. FABP4 encodes the fatty acid binding protein found in adipocytes. Fatty acid binding proteins are a family of small, highly conserved, cytoplasmic proteins that bind long-chain fatty acids and other hydrophobic ligands. It is thought that FABPs roles include fatty acid uptake, transport, and metabolism. [provided by RefSeq, Jul 2008] \
CFD: This gene encodes a member of the S1, or chymotrypsin, family of serine peptidases. This protease catalyzes the cleavage of factor B, the rate-limiting step of the alternative pathway of complement activation. This protein also functions as an adipokine, a cell signaling protein secreted by adipocytes, which regulates insulin secretion in mice. \
APOE: The protein encoded by this gene is a major apoprotein of the chylomicron. It binds to a specific liver and peripheral cell receptor, and is essential for the normal catabolism of triglyceride-rich lipoprotein constituents. This gene maps to chromosome 19 in a cluster with the related apolipoprotein C1 and C2 genes. Mutations in this gene result in familial dysbetalipoproteinemia, or type III hyperlipoproteinemia (HLP III), in which increased plasma cholesterol and triglycerides are the consequence of impaired clearance of chylomicron and VLDL remnants. [provided by RefSeq, Jun 2016] \
G0S2: G0S2 (G0/G1 Switch 2) is a Protein Coding gene. Among its related pathways are Metabolism and Regulation of lipid metabolism by Peroxisome proliferator-activated receptor alpha (PPARalpha). \
SCD: This gene encodes an enzyme involved in fatty acid biosynthesis, primarily the synthesis of oleic acid. The protein belongs to the fatty acid desaturase family and is an integral membrane protein located in the endoplasmic reticulum. Transcripts of approximately 3.9 and 5.2 kb, differing only by alternative polyadenlyation signals, have been detected. A gene encoding a similar enzyme is located on chromosome 4 and a pseudogene of this gene is located on chromosome 17. [provided by RefSeq, Sep 2015]
Marker genes for mature brown/beige compared to white mentioned by Seale 2016: UCP1, DIO2, CIDEA, PPARGC1A, PPARA, COX7A1, COX8B, PRDM16, EBF2. \
VlnPlot(seurobj, features.plot=c('UCP1', 'DIO2', 'CIDEA', 'PPARGC1A', 'PPARA', 'COX7A1', 'PRDM16', 'EBF2'), group.by='timepoint', point.size.use = -1, nCol=2)
COX7A1 seems like the only interesting gene. Let’s see if it’s higher expressed in certain clusters.
VlnPlot(seurobj, features.plot='COX7A1', group.by='res.1.5', point.size.use = -1)
It does seem to be higher expressed in cluster 16 (T5).
DimPlot(seurobj, reduction.use='tsne18', group.by='res.1.5', pt.size=0.1, do.label=T)
FeaturePlot(seurobj, cols.use=c('gray', 'blue'), no.legend=F, features.plot=toupper('Cox7a1'), reduction.use='tsne18')
Negbinom test between clusters res.1.5.
markers_res1.5 <- read.table('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/markergenes/180831/markers_res.1.5_negbinom', header=T)
The top 10 markers per cluster
#markers_res1.5 %>% arrange(cluster, desc(avg_logFC)) %>% top_n(10, cluster)
top10 <- markers_res1.5 %>% arrange(desc(avg_logFC)) %>% group_by(cluster) %>% top_n(10, avg_logFC) %>% arrange(cluster)
top10
DimPlot(seurobj, reduction.use='tsne18', group.by='res.1.5', pt.size=0.1, do.label=T)
top10_orderedLogFC <- top10[order(-top10$avg_logFC),]
top10_orderedLogFC
Genes with the highest avg_logFC.
FeaturePlot(seurobj, reduction.use='tsne18', features.plot=as.vector(unique(top10_orderedLogFC$gene[1:20])), cols.use=c('gray', 'blue'), no.legend=F, nCol = 2)
Cluster 6 and 7
FeaturePlot(seurobj, reduction.use='tsne18', features.plot=c('PLA2G2A', 'SAA1', 'MT1X'), cols.use=c('gray', 'blue'), no.legend=F, nCol = 2)
Heatmap of the top 10 marker genes per cluster
DoHeatmap(SetAllIdent(seurobj, id='res.1.5'), genes.use=top10$gene, remove.key=T, slim.col.label = T)
DimPlot(seurobj, group.by='res.1.5', reduction.use='tsne18', pt.size=0.1, do.label=T)
markers.mixture <- read.table('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/markergenes/180831/markers_res.0.7_negbinom', sep='\t', header=T)
markers.mixture <- markers.mixture[markers.mixture$avg_logFC < 0.05,]
markers.mixture <- markers.mixture[markers.mixture$cluster == 13,]
Positive markers mixture cluster.
as.data.frame(markers.mixture[order(-markers.mixture$avg_logFC),])
Negative markers mixture cluster
as.data.frame(markers.mixture[order(markers.mixture$avg_logFC),])
MALAT1 and NEAT1 again in the top. A lot of the negative markers for the mixture cluster are positive markers for T4-T5 (see above). Probably because the mixture cluster mainly contains cells from T1, T2 and T3 and only a few from T4 and T5. Looking at the expression of those genes in the mixture cluster grouped by timepoint, we can see that they go up as well (CFD, MT1X, APOE, G0S2), so they are found as markers for the mixture cluster because we only have a few T4-T5 cells in our mixture cluster.
VlnPlot(SetAllIdent(seurobj, id='res.0.5'), ident.include=11, features.plot=as.vector(markers.mixture[order(markers.mixture$avg_logFC),'gene'][1:10]), group.by='timepoint', point.size.use=0.5, nCol=5)
stress_genes <- read.table('/raid5/projects/timshel/sc-arc_lira/src/data-genelists/171219-van_den_Brink2017-genes_affected_by_dissociation.csv', header=T) %>% pull(1)
de_genes <- read.table('/projects/pytrik/sc_adipose/analyze_10x_fluidigm/data/markergenes/180831/markers_res.0.7_negbinom', header=T)
de_genes <- de_genes[de_genes$p_val_adj < 0.05,]
Cluster 13 is the mixture cluster. Check MALAT1 and NEAT1 to be sure:
VlnPlot(seurobj, group.by='res.0.7', features.plot=c('MALAT1', 'NEAT1'), point.size.use =-1)
Are any of the stress genes DE genes for the mixture cluster?
de_genes_pos <- de_genes[de_genes$avg_logFC > 0, ]
de_genes_neg <- de_genes[de_genes$avg_logFC < 0, ]
intersect_pos <- list()
intersect_neg <- list()
n_pos <- list()
n_neg <- list()
perc_pos <- list()
perc_neg <- list()
for (i in unique(de_genes$cluster)){
c <- paste('cluster', i)
genes_pos <- de_genes_pos[de_genes_pos$cluster == i, 'gene']
genes_neg <- de_genes_neg[de_genes_neg$cluster == i, 'gene']
intersect_pos[[c]] <- length(intersect(toupper(stress_genes), genes_pos))
intersect_neg[[c]] <- length(intersect(toupper(stress_genes), genes_neg))
n_pos[[c]] <- length(genes_pos)
n_neg[[c]] <- length(genes_neg)
perc_pos[[c]] <- (intersect_pos[[c]] / n_pos[[c]]) * 100
perc_neg[[c]] <- (intersect_neg[[c]] / n_neg[[c]]) * 100
}
data.frame(
shared.pos.genes=unlist(intersect_pos),
shared.neg.genes=unlist(intersect_neg),
n_pos=unlist(n_pos),
n_neg=unlist(n_neg),
perc_pos=unlist(perc_pos),
perc_neg=unlist(perc_neg)
)
mixture_genes_pos <- de_genes_pos[de_genes_pos$cluster == 13, ]
mixture_genes_neg <- de_genes_neg[de_genes_neg$cluster == 13, ]
intersect(toupper(stress_genes), mixture_genes_pos$gene)
## [1] "ACTG1" "HSP90AB1"
VlnPlot(seurobj, features.plot=intersect(toupper(stress_genes), mixture_genes_pos$gene), point.size.use=-1, group.by='res.0.7', nCol=2)
VlnPlot(seurobj, features.plot='TXN', point.size.use=-1, group.by='res.0.7', nCol=2)